library(tidyverse)
## ── Attaching packages ─────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.3
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggridges)
knitr::opts_chunk$set(
  fig.width = 12,
  fig.asp = .6,
  out.width = "90%"
)

weather_df = 
  rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
      var = c("PRCP", "TMIN", "TMAX"),
      date_min = "2017-01-01",
      date_max = "2017-12-31") %>%
  mutate(
    #create a new variable called "name"
        #"NewVarible" = recode("existing variable", "existing values" = "new values" )
    name = recode(id, USW00094728 = "CentralPark_NY", 
                      USC00519397 = "Waikiki_HA",
                      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
## Registered S3 method overwritten by 'hoardr':
##   method           from
##   print.cache_info httr
## file path:          /Users/rachellee/Library/Caches/rnoaa/ghcnd/USW00094728.dly
## file last updated:  2020-01-09 20:48:44
## file min/max dates: 1869-01-01 / 2020-01-31
## file path:          /Users/rachellee/Library/Caches/rnoaa/ghcnd/USC00519397.dly
## file last updated:  2020-03-17 01:46:14
## file min/max dates: 1965-01-01 / 2020-03-31
## file path:          /Users/rachellee/Library/Caches/rnoaa/ghcnd/USS0023B17S.dly
## file last updated:  2020-03-17 01:46:18
## file min/max dates: 1999-09-01 / 2020-03-31
weather_df
## # A tibble: 1,095 x 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows
##############
#SCATTER PLOT#
##############


ggplot(weather_df, aes(x=tmin, y=tmax)) + geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

#SAME AS
weather_df %>%
  ggplot(aes(x=tmin, y=tmax)) + geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

#save the output of ggplot() to an object
plot_weather = weather_df %>%
  ggplot(aes(x=tmin, y=tmax))
  ##AND THEN modify / print
plot_weather + geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

######################
#Advanced Scatterplot#
######################


ggplot(weather_df, aes(x = tmin, y=tmax, color=name))+   geom_point(alpha= .5)  + #alpha=transparency
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name) #separate grids for each "name"
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = date, y = tmax, color = name)) +
  geom_point(aes(size = prcp), alpha = .5) +
  geom_smooth(se = FALSE) +
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

###################################################
# Learning Assessment: Write a code chain that starts # with weather_df; focuses only on Central Park, 
# converts temperatures to Fahrenheit, makes a 
# scatterplot of min vs. max temperature, and overlays # a linear regression line (using options in 
# geom_smooth())
###################################################


weather_df %>%
  
  #filtering data with only "CentralPark_NY"
  filter(name == "CentralPark_NY") %>%
  
  #adding farenheit variables 
  mutate( tmax_f = tmax* (9/5) + 32,
          tmin_f = tmin* (9/5) + 32) %>%
  
  #plot tmin_f against tmax_f
  ggplot( aes (x = tmin_f, y = tmax_f )) +
  
  #adding point
  geom_point( alpha = 0.5 ) +
  
  #adding linear regression line 
  geom_smooth( method = "lm", se = FALSE )

#Smooth Curve
    #same plots
ggplot(weather_df, aes(x = date, y=tmax, color = name)) + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

ggplot(weather_df) + geom_smooth( aes (x=date, y=tmax, color = name), se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

#Scatter Plots

  #why are these two different?

ggplot(weather_df) + 
  geom_point(aes(x=tmax, y=tmin), color = "blue")  #--> Defines the color of the point  (OUTSIDE aes)
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df) +
  geom_point(aes(x=tmax, y=tmin, color = "blue")) #--> Creating a new variable called "color" and assigning the value "blue" 
## Warning: Removed 15 rows containing missing values (geom_point).

#Histogram

ggplot(weather_df, aes(x = tmax)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

ggplot(weather_df, aes(x = tmax, fill = name)) +
  geom_histogram(position = "dodge" , binwidth = 2)
## Warning: Removed 3 rows containing non-finite values (stat_bin).

#Density Curve
ggplot(weather_df, aes(x = tmax)) + 
  geom_density(alpha = .4, adjust = .5, color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_density).

 ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_density(alpha = .4, adjust = .5, color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_density).

 #boxplots
 ggplot(weather_df, aes(x = name, y = tmax)) +
   geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

 #violin plots
 ggplot(weather_df, aes(x = name, y = tmax)) + 
   geom_violin(aes(fill = name), color = "blue", alpha = .5) + stat_summary(fun.y = median, geom = "point", color = "blue", size = 4 )
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_summary).

 #ridge plots
 ggplot(weather_df, aes(x = tmax, y = name)) + geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).

#learning assessment
 
 #histogram
 ggplot(weather_df, aes( x = prcp, fill = name)) +
   geom_histogram(position = "dodge", binwidth = 80)
## Warning: Removed 3 rows containing non-finite values (stat_bin).

 #density curve: these two are the same thing
 ggplot(weather_df, aes( x = prcp, fill = name)) + 
  geom_density(alpha = .4, adjust = 40, color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_density).

 ggplot(weather_df, aes( x = prcp)) + 
  geom_density(aes(fill = name), alpha = .5, adjust = 40)
## Warning: Removed 3 rows containing non-finite values (stat_density).

 #ridge plot 
 ggplot(weather_df, aes( x = prcp, y = name)) +
   geom_density_ridges(scale = .85)
## Picking joint bandwidth of 4.61
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).

 #boxplot
 ggplot(weather_df, aes(x = name, y = prcp)) +           geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

 weather_df %>% 
  filter(prcp > 0) %>% 
  ggplot(aes(x = prcp, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 19.7

############################
#Saving and embedding plots#
############################
 
weather_plot = ggplot(weather_df, aes(x = tmin, y = tmax)) + 
   geom_point(aes(color = name), alpha = 0.5)
 
ggsave("weather_plot.pdf", weather_plot, width = 8, height = 5)
## Warning: Removed 15 rows containing missing values (geom_point).